# PEDL_2.R

# Contains: Code to generate Figure 1 and Figure A3

# Load libraries
library(pacman)
pacman::p_load(ggplot2, plyr, lubridate)

# Set working directory (YOUR FILEPATH HERE)
setwd("")

#######################################################################
#######################################################################

# Import data
primary <- read.csv(file = "primary_cleaned.csv", header = TRUE)
endline <- readRDS(file = "endline.Rds")

#######################################################################
#######################################################################


#### Figure 1: Study enrollment and timeline

  # Add arm as a factor
  primary$arm.factor <- factor(primary$arm, levels = c("0", "125", "250", "375"))
  
  # Reformat enrollment date as an actual date
  primary$enroll_asDate <- mdy(primary$enrollment_date)
  
  # Enrollment, overall and by group
  primary$t <- 1
  
  primary <- primary[order(primary$enroll_asDate),] 
  primary$cumenroll <- (cumsum(primary$t) / length(primary$t))*100
  
  pdat <- data.frame(
    Date = primary$enroll_asDate,
    Enrollment = primary$cumenroll
  )
  
  # Study exit, overall and by group
  endline$expiry_asDate <- ymd(endline$expiry_date)
  endline$t <- 1
  endline <- endline[order(endline$expiry_asDate),]
  endline$cumenroll <- ((length(endline$t) - cumsum(endline$t)) / length(endline$t))*100
  
  edat <- data.frame(
    Date = endline$expiry_asDate,
    Enrollment = endline$cumenroll
  )


## Combine datasets and plot
f1dat <- rbind(pdat, edat)


f1 <- ggplot() +
  geom_step(data = f1dat, aes(Date, Enrollment)) +
  geom_vline(xintercept=as.numeric(as.Date("2020-04-01")), color = "gray40", lty = 2) +
  geom_vline(xintercept=as.numeric(as.Date("2020-09-30")), color = "gray40", lty = 2) +
  annotate("rect", fill = "gray50", alpha = 0.5, xmin= ymd("2020-04-01"), xmax = ymd("2020-09-30"), ymin = -Inf, ymax = Inf) +
  annotate("rect", fill = "gray80", alpha = 0.5, xmin= ymd("2019-12-10"), xmax = ymd("2020-03-31"), ymin = -Inf, ymax = Inf) +
  annotate("rect", fill = "gray80", alpha = 0.5, xmin= ymd("2020-10-1"), xmax = ymd("2021-03-13"), ymin = -Inf, ymax = Inf) +
  annotate(geom="text", x=ymd("2020-07-01"), y=2, label="Suspension period", color="black", size = 3.8) +
  annotate(geom="text", x=ymd("2020-02-05"), y=2, label="Pre-suspension period", color="black", size = 3.8) +
  annotate(geom="text", x=ymd("2021-01-12"), y=2, label="Post-suspension period", color="black", size = 3.8) +
  scale_x_date(date_breaks = "1 month", date_labels =  "%b %Y") +
  ylab("Percent enrolled") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1))
f1



#######################################################################
#######################################################################

#### Figure A3: Study enrollment and timeline by arm
  primary <- ddply(primary[order(primary$t),],
                   .(arm.factor), 
                   .fun = transform, 
                   cumenroll_gp = (cumsum(t))
  )
  primary <- ddply(primary,
                   .(arm.factor),
                   transform,
                   group_n = length(cumenroll_gp))
  primary$cumenroll_group <- (primary$cumenroll_gp / primary$group_n)*100
  
  pdat2 <- data.frame(
    Date = primary$enroll_asDate,
    Enrollment = primary$cumenroll_group,
    Arm = primary$arm.factor
  ) 
  
  endline <- ddply(endline[order(endline$t),],
                   .(arm.factor), 
                   .fun = transform, 
                   cumenroll_gp = (cumsum(t))
  )
  endline <- ddply(endline,
                   .(arm.factor),
                   transform,
                   group_n = length(cumenroll_gp))
  endline$cumenroll_group <- ((endline$group_n - endline$cumenroll_gp) / endline$group_n)*100
  
  edat2 <- data.frame(
    Date = endline$expiry_asDate,
    Enrollment = endline$cumenroll_group,
    Arm = primary$arm.factor
  )
  
  
## Combine datasets and plot
f1dat2 <- rbind(pdat2, edat2)
  
f1dat2$arm.dollar <- factor(ifelse(f1dat2$Arm == 0, "Control", 
                                    ifelse(f1dat2$Arm == 125, "$1.7",
                                          ifelse(f1dat2$Arm == 250, "$3.4", "$5.1"))),
                            levels = c("Control", "$1.7", "$3.4", "$5.1"))

fa3 <- ggplot(data = f1dat2, aes(Date, Enrollment, group = arm.dollar)) +
  geom_step(aes(color = arm.dollar)) +
  geom_vline(xintercept=as.numeric(as.Date("2020-04-01")), color = "red", lty = 2) +
  geom_vline(xintercept=as.numeric(as.Date("2020-09-30")), color = "red", lty = 2) +
  annotate("rect", fill = "pink", alpha = 0.5, xmin= ymd("2020-04-01"), xmax = ymd("2020-09-30"), ymin = -Inf, ymax = Inf) +
  annotate("rect", fill = "slategray1", alpha = 0.5, xmin= ymd("2019-12-10"), xmax = ymd("2020-03-31"), ymin = -Inf, ymax = Inf) +
  annotate("rect", fill = "slategray1", alpha = 0.5, xmin= ymd("2020-10-1"), xmax = ymd("2021-03-13"), ymin = -Inf, ymax = Inf) +
  annotate(geom="text", x=ymd("2020-07-01"), y=2, label="Suspension period", color="red", size = 3.3) +
  annotate(geom="text", x=ymd("2020-02-05"), y=2, label="Pre-suspension period", color="black", size = 3.3) +
  annotate(geom="text", x=ymd("2021-01-12"), y=2, label="Post-suspension period", color="black", size = 3.3) +
  scale_x_date(date_breaks = "1 month", date_labels =  "%b %Y") +
  ylab("Percent enrolled") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1),
        legend.title = element_blank())

fa3